# load required libraries
library(gplite)
## This is gplite version 0.13.0
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
gp_optimGenerate data for the function to which the model will be fit.
n <- 5
x <- matrix(seq(-2, 2, length.out = n), ncol = 1)
y <- x^2
Plot the data.
ggplot() +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Initialize the GP model.
# Specify the GP model we want to use:
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 0.3,
magn = 1,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.5,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
Optimize the model using the internal function.
# Now fit the model to the data:
gp_optimized <- gp_optim(gp_empty, x, y, verbose = FALSE)
Plot the model.
# compute the predictive mean and variance in a grid of points
xt <- seq(-4, 4, len=150)
pred <- gp_pred(gp_optimized, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the energy.
gp_energy(gp_optimized)
## [1] 11.887
Increase the number of restarts.
# Now fit the model to the data:
gp_optimized_10 <- gp_optim(
gp = gp_empty,
x = x,
y = y,
restarts = 10,
verbose = FALSE
)
Plot the model with larger number of restarts.
# compute the predictive mean and variance in a grid of points
pred <- gp_pred(gp_optimized_10, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the energy.
gp_energy(gp_optimized_10)
## [1] 11.887
Increase to 1000 restarts.
# Now fit the model to the data:
gp_optimized_1000 <- gp_optim(
gp = gp_empty,
x = x,
y = y,
restarts = 1000,
verbose = FALSE
)
Plot the model with larger number of restarts.
# compute the predictive mean and variance in a grid of points
pred <- gp_pred(gp_optimized_1000, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the energy.
gp_energy(gp_optimized_1000)
## [1] 11.887
Increase to 1,000,000 restarts.
# Now fit the model to the data:
gp_optimized_1e6 <- gp_optim(
gp = gp_empty,
x = x,
y = y,
restarts = 1e6,
verbose = FALSE
)
Plot the model with larger number of restarts.
# compute the predictive mean and variance in a grid of points
pred <- gp_pred(gp_optimized_1e6, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the energy.
gp_energy(gp_optimized_1e6)
## [1] 11.887
ell <- seq(0.5, 3, by = 0.05)
sigma_f <- seq(2, 8, by = 0.05)
hyperparams <- expand.grid(
ell = ell,
sigma_f = sigma_f
)
# collect energies in the empty vector
energy <- c()
# run 6000+ models
for (i in 1:dim(hyperparams)[1]) {
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = hyperparams$ell[i],
magn = hyperparams$sigma_f[i],
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw <- gp_fit(gp_empty, x, y)
energy[i] <- gp_energy(gp_raw)
}
surface_plot <- cbind(hyperparams, energy)
surface_plot[which.min(surface_plot$energy),]
## ell sigma_f energy
## 4212 1.95 6.1 11.05812
Model with \(\ell = 2\) and \(\sigma_f^2 = 6\), near the optimal values. Recall that we are setting \(\sigma_n^2 = 0.01\).
# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 2,
magn = 6,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw_ell2sigma6 <- gp_fit(gp_empty, x, y)
Plot the model.
# compute the predictive mean and variance in a grid of points
pred_ell2sigma6 <- gp_pred(gp_raw_ell2sigma6, xt, var = T)
# visualize
mu_ell2sigma6 <- pred_ell2sigma6$mean
lb_ell2sigma6 <- pred_ell2sigma6$mean - 2*sqrt(pred_ell2sigma6$var)
ub_ell2sigma6 <- pred_ell2sigma6$mean + 2*sqrt(pred_ell2sigma6$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray') +
geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Plot the entire surface.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f),
y = unique(surface_plot$ell),
z = matrix(surface_plot$energy, nrow = 51, ncol = 121),
type = "surface") %>%
add_trace(
x = 6.1,
y = 1.95,
z = 11.05812,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Plot the surface nearest the optimum value.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f)[80:121],
y = unique(surface_plot$ell)[10:51],
z = matrix(surface_plot$energy, nrow = 51, ncol = 121)[10:51, 80:121],
type = "surface") %>%
add_trace(
x = 6.1,
y = 1.95,
z = 11.05812,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Next, increase the number of points that we are fitting, but within the originally specified range of the function \(x \in (-2, 2)\). Below, I have added 3 new points.
# n is 7 now; used to be 5
n_new <- 7
x_new <- matrix(seq(-2, 2, length.out = n_new), ncol = 1)
y_new <- x_new^2
Plot the data.
ggplot() +
geom_point(aes(x = x_new, y = y_new), size=2) +
xlab('x') + ylab('y')
Now, fit the model using gp_optim.
gp_empty_7 <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 0.3,
magn = 1,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.5,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
Optimize.
gp_optimized_7 <- gp_optim(gp = gp_empty_7,
x = x_new,
y = y_new,
restarts = 5,
verbose = TRUE)
## Optimizing parameters
## p1: log cf_sexp.lscale
## p2: log cf_sexp.magn
## p3: log lik_gaussian.sigma
##
## p1 p2 p3 Energy Iteration
## -1.20 0.00 -0.69 21.92 0
## -1.08 0.00 -0.69 21.45 1
## -1.20 0.12 -0.69 19.95 2
## -1.20 0.00 -0.57 21.38 3
## -1.12 0.08 -0.61 20.04 4
## -1.14 0.06 -0.63 20.47 5
## -1.27 0.13 -0.56 19.57 6
## -1.36 0.20 -0.49 18.77 7
## -1.26 0.27 -0.63 18.15 8
## -1.28 0.40 -0.65 17.09 9
## -1.44 0.40 -0.61 17.24 10
## -1.36 0.32 -0.61 17.81 11
## -1.53 0.55 -0.48 16.39 12
## -1.69 0.76 -0.37 15.93 13
## -1.58 0.84 -0.60 15.91 14
## -1.69 1.16 -0.65 16.51 15
## -1.59 0.94 -0.47 15.99 16
## -1.55 0.80 -0.51 15.90 17
## -1.93 1.20 -0.33 16.68 18
## -1.77 1.00 -0.41 16.11 19
## -1.44 0.60 -0.57 16.20 20
## -1.69 0.90 -0.45 15.96 21
## -1.53 0.70 -0.53 15.98 22
## -1.65 0.85 -0.47 15.92 23
## -1.50 0.90 -0.68 15.92 24
## -1.55 0.87 -0.60 15.91 25
## -1.47 0.82 -0.67 15.88 26
## -1.38 0.81 -0.76 15.84 27
## -1.46 0.77 -0.64 15.90 28
## -1.48 0.79 -0.63 15.89 29
## -1.37 0.76 -0.67 15.85 30
## -1.42 0.78 -0.65 15.87 31
## -1.27 0.77 -0.87 15.77 32
## -1.13 0.75 -1.05 15.61 33
## -1.11 0.75 -1.02 15.57 34
## -0.92 0.74 -1.22 15.23 35
## -0.92 0.77 -1.36 15.23 36
## -0.70 0.78 -1.70 14.68 37
## -0.45 0.71 -1.88 13.94 38
## 0.02 0.65 -2.44 12.98 39
## 0.07 0.69 -2.52 12.67 40
## 0.67 0.66 -3.26 23.11 41
## 0.51 0.68 -3.22 16.47 42
## -0.56 0.72 -1.72 14.28 43
## 0.38 0.60 -2.76 15.45 44
## -0.43 0.73 -1.96 13.84 45
## 0.33 0.66 -2.90 13.68 46
## 0.11 0.68 -2.60 12.75 47
## 0.56 0.62 -3.08 19.80 48
## -0.18 0.70 -2.24 13.12 49
## 0.31 0.65 -2.80 13.75 50
## -0.06 0.69 -2.38 12.90 51
## 0.06 0.72 -2.56 12.53 52
## 0.08 0.75 -2.62 12.32 53
## 0.23 0.73 -2.78 12.41 54
## 0.16 0.72 -2.68 12.44 55
## 0.14 0.77 -2.68 12.10 56
## 0.16 0.81 -2.72 11.80 57
## 0.25 0.84 -2.89 11.58 58
## 0.34 0.91 -3.07 11.12 59
## 0.16 0.92 -2.83 11.37 60
## 0.18 0.87 -2.81 11.50 61
## 0.36 1.01 -3.12 10.46 62
## 0.51 1.14 -3.37 9.80 63
## 0.51 1.17 -3.47 9.61 64
## 0.68 1.34 -3.84 9.01 65
## 0.86 1.34 -4.03 11.88 66
## 0.34 1.03 -3.13 10.42 67
## 0.68 1.43 -3.82 8.37 68
## 0.84 1.69 -4.19 7.38 69
## 1.02 1.75 -4.48 8.62 70
## 0.85 1.57 -4.14 8.35 71
## 1.08 1.93 -4.74 7.29 72
## 1.36 2.33 -5.43 7.25 73
## 1.35 2.39 -5.33 6.40 74
## 1.68 2.91 -6.08 6.36 75
## 1.74 3.05 -6.33 5.62 76
## 2.19 3.78 -7.42 6.62 77
## 2.34 3.83 -7.70 13.26 78
## 1.22 2.23 -5.07 6.18 79
## 1.73 3.12 -6.22 4.56 80
## 1.92 3.52 -6.62 3.52 81
## 1.57 2.95 -5.93 3.85 82
## 1.60 2.94 -5.97 4.27 83
## 2.27 4.12 -7.52 3.61 84
## 2.00 3.65 -6.90 3.66 85
## 2.09 4.02 -7.05 1.80 86
## 2.27 4.50 -7.41 0.82 87
## 2.73 5.14 -8.43 1.79 88
## 2.44 4.59 -7.80 2.05 89
## 2.35 4.65 -7.45 0.77 90
## 2.39 4.92 -7.42 0.36 91
## 3.02 6.19 -8.89 -0.19 92
## 3.57 7.52 -10.02 -0.22 93
## 2.75 6.16 -8.14 0.75 94
## 2.75 5.90 -8.21 0.18 95
## 3.53 7.73 -9.70 0.67 96
## 3.22 6.92 -9.12 0.20 97
## 3.96 8.65 -10.81 1.04 98
## 2.78 5.85 -8.27 0.02 99
## 2.85 5.93 -8.54 -0.06 100
## 2.94 6.18 -8.69 -0.05 101
## 3.38 6.97 -9.68 -0.55 102
## 3.70 7.50 -10.41 -1.06 103
## 3.96 8.12 -11.05 -0.71 104
## 3.67 7.55 -10.36 -0.74 105
## 4.44 9.12 -11.98 0.71 106
## 3.25 6.73 -9.40 -0.34 107
## 3.51 6.99 -10.10 -1.11 108
## 3.49 6.73 -10.13 -1.24 109
## 3.99 7.79 -11.20 -1.46 110
## 4.36 8.33 -12.09 -0.99 111
## 3.79 7.13 -10.81 -1.62 112
## 3.85 6.92 -11.03 -0.68 113
## 3.81 6.94 -11.01 -1.11 114
## 3.78 7.08 -10.86 -1.57 115
## 4.22 7.94 -11.78 -1.15 116
## 3.67 7.03 -10.54 -1.59 117
## 3.50 6.37 -10.28 -0.76 118
## 3.87 7.44 -10.97 -1.63 119
## 3.77 7.32 -10.68 -1.59 120
## 3.77 7.26 -10.73 -1.64 121
## 3.95 7.52 -11.13 -1.59 122
## 3.88 7.40 -10.98 -1.64 123
## 3.89 7.60 -10.98 -1.54 124
## 3.81 7.25 -10.85 -1.65 125
## 3.77 7.17 -10.74 -1.65 126
## 3.80 7.24 -10.80 -1.66 127
## 3.71 7.10 -10.60 -1.62 128
## 3.84 7.32 -10.89 -1.65 129
## 3.86 7.28 -10.96 -1.61 130
## 3.79 7.27 -10.79 -1.65 131
## 3.81 7.30 -10.80 -1.65 132
## 3.81 7.26 -10.84 -1.66 133
## 3.77 7.19 -10.73 -1.65 134
## 3.82 7.29 -10.85 -1.66 135
## Assessing convergence...
## 3.90 7.24 -10.80 -1.42 136
## 3.70 7.24 -10.80 -1.50 137
## 3.80 7.34 -10.80 -1.63 138
## 3.80 7.14 -10.80 -1.60 139
## 3.80 7.24 -10.70 -1.66 140
## 3.80 7.24 -10.90 -1.66 141
## Optimization converged.
Optimal value of \(\sigma_f^2 = 7.24\) and optimal value of \(\ell=3.8\).
Pull the energy.
gp_energy(gp_optimized_7)
## [1] -1.65625
Plot the optimal model.
xt_more <- seq(-10, 10, len=150)
pred_7 <- gp_pred(gp = gp_optimized_7,
xnew = xt_more,
var = TRUE)
mu_pred_7 <- pred_7$mean
lb_pred_7 <- pred_7$mean - 2*sqrt(pred_7$var)
ub_pred_7 <- pred_7$mean + 2*sqrt(pred_7$var)
ggplot() +
geom_ribbon(aes(x=xt_more, ymin=lb_pred_7, ymax=ub_pred_7), fill='lightgray') +
geom_line(aes(x=xt_more, y=mu_pred_7), linewidth = 0.5) +
geom_point(aes(x=x_new, y=y_new), size=2) +
xlab('x') + ylab('y')
Recall, optimal value of \(\sigma_f^2 = 7.24\) and optimal value of \(\ell=3.8\). We will set \(\sigma_n^2 = 0.01\).
# using prior optimization, set sequence
ell <- seq(1, 6, by = 0.05)
# using prior optimization, set sequence
sigma_f <- seq(4, 10, by = 0.05)
hyperparams <- expand.grid(
ell = ell,
sigma_f = sigma_f
)
# collect energies in the empty vector
energy <- vector(mode = "numeric", length = dim(hyperparams)[1])
# run 6000+ models
for (i in 1:dim(hyperparams)[1]) {
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = hyperparams$ell[i],
magn = hyperparams$sigma_f[i],
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw <- gp_fit(gp_empty, x_new, y_new)
energy[i] <- gp_energy(gp_raw)
}
surface_plot <- cbind(hyperparams, energy)
surface_plot[which.min(surface_plot$energy),]
## ell sigma_f energy
## 12159 2.9 10 5.433649
Is the range of energies lower value?
range(surface_plot$energy)
## [1] 5.433649 97.868111
Plot the surface.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f),
y = unique(surface_plot$ell),
z = matrix(surface_plot$energy, nrow = 101, ncol = 121),
type = "surface") %>%
add_trace(
x = 10,
y = 2.9,
z = 5.433,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Plot the surface with lowest energies.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f),
y = unique(surface_plot$ell)[1:45],
z = matrix(surface_plot$energy, nrow = 101, ncol = 121)[1:45, ],
type = "surface") %>%
add_trace(
x = 10,
y = 2.9,
z = 5.433,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
\(n=7\) works, but \(n=8\) or \(n=9\) does not.
# n is 8 now
n_new <- 8
x_new <- matrix(seq(-2, 2, length.out = n_new), ncol = 1)
y_new <- x_new^2
Plot the data.
ggplot() +
geom_point(aes(x = x_new, y = y_new), size=2) +
xlab('x') + ylab('y')
Now, fit the model using gp_optim.
gp_empty_8 <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 0.3,
magn = 1,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.5,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
Optimize.
gp_optimized_8 <- gp_optim(gp = gp_empty_8,
x = x_new,
y = y_new,
restarts = 5,
verbose = FALSE)
## Error in chol.default(B): the leading minor of order 4 is not positive